Lets review MLB draft season.

Start by loading the packages

suppressMessages({
  library(tidyverse) #ggplot2 dplyr tibble tidyr purrr forecats 
  library(ggrepel) #automatically position non-overlapping text labels
  library(glue) #interpreted literal strings
  library(gt)
  library(gtExtras)
  library(paletteer)
  library(mlbplotR)
})

Next we load the team logos

teams_colors_logos <- mlbplotR::load_mlb_teams() %>% 
  filter(!team_abbr %in% c("AL", "NL", "MLB")) %>% 
  mutate(
    a = rep(1:6, 5), 
    b = sort(rep(1:5, 6), decreasing=T), 
    alpha = ifelse(grepl("A", team_abbr),1,0.75),
    color = ifelse(grepl("E", team_abbr), "b/w", NA)
  )

Now lets load our draft data and clean it up.

exposure <- read.csv("./data/exposure_mar30.csv")

exposure <- exposure %>% 
  mutate(Picked.At = as.Date(as.POSIXct(exposure$Picked.At, format="%Y-%m-%d %H:%M:%S", tz="UTC")), 
         name = paste(First.Name, Last.Name)) %>% 
  select(name, Team, Position, Picked.At, Pick.Number, Draft) %>% 
  left_join(teams_colors_logos %>% select(team_abbr, team_logo_espn), by=c('Team'='team_abbr')) %>% 
  left_join(read.csv("./data/playerids.csv"), by=c('name'='Name')) %>% 
  mutate(
    copy = paste0(name, Draft),
    playerid = as.double(playerid)) %>% 
  distinct(copy, .keep_all = T) %>% 
  left_join(mlbplotR::load_headshots() %>% select(fangraphs_id, espn_headshot) %>% drop_na(fangraphs_id), by=c("playerid"="fangraphs_id")) %>% 
  select(-copy) %>% 
  left_join(read.csv("./projections_season/rankings_mar20.csv") %>% 
              mutate(name = paste(firstName, lastName), 
                     adp = as.numeric(adp)) %>% 
              select(name, adp, projectedPoints, positionRank),
            by=c("name")) %>% 
  mutate(value = Pick.Number-adp, 
         rel_value = round(value/adp, digits = 2),
         positionGroup = gsub("[^A-Z]","",positionRank)) %>% 
  drop_na(adp) %>% 
  arrange(Pick.Number)
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

Highest owned players

#group by player
exposure %>% 
  group_by(name, espn_headshot) %>% 
  summarise(count=n()) %>% 
  arrange(-count) %>% 
  ungroup() %>% 
  mutate(own = round(count/length(unique(exposure$Draft)),digits = 2)) %>% 
  slice_head(n=10) %>% 
  gt() %>% 
  gt_img_rows(columns = espn_headshot, height = 50) %>% 
  gt_theme_dark()
## `summarise()` has grouped output by 'name'. You can override using the
## `.groups` argument.
name espn_headshot count own
Mark Canha 20 0.28
J.D. Martinez 19 0.26
Willson Contreras 18 0.25
Jake Cronenworth 17 0.24
Lourdes Gurriel Jr. 17 0.24
Tommy Edman 15 0.21
Aaron Nola 14 0.19
Austin Hays 14 0.19
Justin Turner 14 0.19
Alek Manoah 13 0.18

Lets look at the drafts grouped by date

#drafts by date
drafts_by_date <- exposure %>% 
  group_by(Picked.At) %>% 
  summarize(total_picks = n(),
            total_value = sum(value, na.rm = T), 
            total_rel_value = sum(rel_value, na.rm = T)) %>% 
  mutate(value_per_pick = round(total_value/total_picks,digits = 2), 
         rel_value_per_pick = round(total_rel_value/total_picks,digits=2))

drafts_by_date %>% gt() %>% gt_theme_dark()
Picked.At total_picks total_value total_rel_value value_per_pick rel_value_per_pick
2023-02-19 20 -21.0 0.25 -1.05 0.01
2023-02-21 172 -399.5 -0.65 -2.32 0.00
2023-02-22 111 232.2 7.02 2.09 0.06
2023-02-23 177 -26.8 0.50 -0.15 0.00
2023-02-25 120 118.6 3.09 0.99 0.03
2023-02-26 20 -29.3 0.04 -1.46 0.00
2023-02-27 39 -99.8 0.69 -2.56 0.02
2023-02-28 40 1.4 0.11 0.04 0.00
2023-03-01 20 35.1 0.78 1.76 0.04
2023-03-02 20 -13.2 0.10 -0.66 0.00
2023-03-04 40 29.2 0.56 0.73 0.01
2023-03-05 20 107.0 1.36 5.35 0.07
2023-03-06 20 32.6 -0.33 1.63 -0.02
2023-03-08 80 -27.1 0.22 -0.34 0.00
2023-03-09 20 -42.3 0.92 -2.11 0.05
2023-03-11 7 21.0 0.50 3.00 0.07
2023-03-12 57 67.8 0.56 1.19 0.01
2023-03-13 16 -84.2 -0.50 -5.26 -0.03
2023-03-19 80 239.2 2.20 2.99 0.03
2023-03-20 40 -32.9 -0.07 -0.82 0.00
2023-03-26 160 25.3 3.13 0.16 0.02
2023-03-27 20 -167.4 -1.04 -8.37 -0.05
2023-03-29 80 -218.2 -0.47 -2.73 -0.01
2023-03-30 59 -279.9 -0.70 -4.74 -0.01

Top 10 picks from all drafts in terms of value

#top ten picks by value
exposure %>% 
  select(name, team_logo_espn, espn_headshot, Pick.Number, adp, value, rel_value, Picked.At) %>% 
  arrange(-rel_value) %>% 
  slice_head(n=10) %>% 
  
  gt() %>% 
  gt_img_rows(columns = "team_logo_espn", height = 50) %>% 
  gt_img_rows(columns = "espn_headshot", height = 50) %>% 
  gt_theme_dark()
name team_logo_espn espn_headshot Pick.Number adp value rel_value Picked.At
Aaron Judge 4 1.2 2.8 2.33 2023-02-22
Juan Soto 6 2.5 3.5 1.40 2023-03-26
Ronald Acuña Jr. 6 2.9 3.1 1.07 2023-03-09
Aaron Judge 2 1.2 0.8 0.67 2023-03-30
Aaron Judge 2 1.2 0.8 0.67 2023-02-21
Juan Soto 4 2.5 1.5 0.60 2023-02-27
Julio Rodríguez 8 5.4 2.6 0.48 2023-02-22
Jarred Kelenic 225 153.5 71.5 0.47 2023-02-23
Shohei Ohtani 7 4.8 2.2 0.46 2023-02-22
Jarred Kelenic 221 153.5 67.5 0.44 2023-03-04

team drafted

#group by team drafted
exposure %>% 
  group_by(Team, team_logo_espn) %>% 
  summarise(count=n()) %>% 
  arrange(-count) %>% 
  ungroup() %>% 
  rename(team = team_logo_espn) %>% 
  slice_head(n=10) %>% 
  gt() %>% 
  gt_img_rows(columns = team) %>% 
  gt_theme_dark() 
## `summarise()` has grouped output by 'Team'. You can override using the
## `.groups` argument.
Team team count
STL 116
NYM 95
SD 89
LAD 85
ATL 79
LAA 73
TOR 68
NYY 56
SEA 56
HOU 55

by position

#group by position
exposure %>% 
  group_by(Position) %>% 
  summarise(count=n()) %>% 
  arrange(-count) %>% 
  ungroup() %>% 
  mutate(own = round(count/sum(count),digits = 2)) %>% 
  gt() %>% 
  gt_theme_dark()
Position count own
SP 430 0.30
RF 171 0.12
LF 165 0.11
3B 162 0.11
1B 114 0.08
CF 102 0.07
SS 102 0.07
2B 94 0.07
C 59 0.04
DH 39 0.03

Number of stacked batters by team

#Number of stacked batters by team
exposure %>% 
  group_by(Draft, Team, team_logo_espn) %>% 
  summarise(count=n()) %>% 
  arrange(-count) %>% 
  ungroup() %>% 
  group_by(Team, team_logo_espn) %>% 
  summarise(count=n()) %>% 
  arrange(-count) %>% 
  gt() %>% 
  gt_img_rows(columns = team_logo_espn) %>% 
  gt_theme_dark()
## `summarise()` has grouped output by 'Draft', 'Team'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'Team'. You can override using the
## `.groups` argument.
team_logo_espn count
STL
47
NYM
44
ATL
41
LAD
41
SD
40
TOR
38
LAA
36
MIL
35
HOU
34
TB
34
SEA
33
SF
33
MIN
32
PHI
32
BOS
29
CLE
29
NYY
29
BAL
26
ARI
25
MIA
24
CWS
23
KC
23
TEX
23
COL
18
CHC
17
PIT
17
DET
12
CIN
10
WSH
2
OAK
1

Creating objects to merge to the drafts dataframe

ord <- c("P","IF","OF")

#configurations of P, OF, IF
exposure_config <- exposure %>% 
  group_by(Draft, positionGroup) %>% 
  summarise(count=n()) %>% 
  arrange(Draft, factor(positionGroup, levels = ord)) %>% 
  ungroup() %>% 
  group_by(Draft) %>% 
  summarise(config = as.numeric(paste0(count, collapse = ""))) %>% 
  ungroup() 
## `summarise()` has grouped output by 'Draft'. You can override using the
## `.groups` argument.
exposure_config %>% 
  group_by(config) %>% 
  summarise(count = n()) %>% 
  arrange(-count)
## # A tibble: 9 × 2
##   config count
##    <dbl> <int>
## 1    677    27
## 2    686    18
## 3    587    11
## 4    776     8
## 5    578     3
## 6    767     2
## 7    586     1
## 8    676     1
## 9    965     1
#stacked batters by draft
exposure_batters <- exposure %>% 
  filter(positionGroup != "P") %>% 
  group_by(Draft, Team) %>% 
  summarise(batters = n()) %>% 
  ungroup() %>% 
  filter(batters > 1) %>% 
  group_by(Draft) %>% 
  summarise(batters = sum(batters)) %>% 
  arrange(-batters)
## `summarise()` has grouped output by 'Draft'. You can override using the
## `.groups` argument.
# biggest stack per draft
exposure_big_stack <- exposure %>% 
  filter(positionGroup != "P") %>% 
  group_by(Draft, Team) %>% 
  summarise(batters = n()) %>% 
  ungroup() %>% 
  group_by(Draft) %>% 
  summarise(big_stack = max(batters)) %>% 
  ungroup()
## `summarise()` has grouped output by 'Draft'. You can override using the
## `.groups` argument.
#number of teams with stacked batters per draft
exposure_num_teams <- exposure %>% 
  filter(positionGroup != "P") %>% 
  group_by(Draft, Team) %>% 
  summarise(batters = n()) %>% 
  ungroup() %>% 
  filter(batters > 1) %>% 
  group_by(Draft) %>% 
  summarise(teams_stacked = n())
## `summarise()` has grouped output by 'Draft'. You can override using the
## `.groups` argument.
#find first pick of each draft
first_pick <- exposure %>% 
  filter(Pick.Number < 13) %>% select(name, Draft) %>% 
  rename("first_pick" = "name")

#drafts
drafts <- exposure %>% 
  #drop_na() %>% 
  group_by(Draft) %>% 
  summarize(total_picks = n(),
            total_value = sum(value), 
            total_rel_value = sum(rel_value), 
            Picked.At = last(Picked.At)) %>% 
  mutate(value_per_pick = round(total_value/total_picks, digits = 2),
         rel_value_per_pick = round(total_rel_value/total_picks, digits = 2)) %>% 
  arrange(-rel_value_per_pick) %>% 
  left_join(exposure_config, by=c("Draft")) %>% 
  left_join(exposure_batters, by=c("Draft")) %>% 
  left_join(exposure_big_stack, by=c("Draft")) %>% 
  left_join(exposure_num_teams, by=c("Draft")) %>% 
  left_join(first_pick, by=c("Draft")) %>% 
  mutate(file = paste(Picked.At,config, teams_stacked, batters, big_stack, first_pick))

drafts %>% 
  select(file, total_value, total_rel_value, Picked.At) %>% 
  gt()
file total_value total_rel_value Picked.At
2023-02-22 587 5 11 3 Aaron Judge 147.1 3.58000e+00 2023-02-22
2023-02-22 686 3 10 4 Mike Trout 33.8 1.54000e+00 2023-02-22
2023-02-25 686 4 10 3 Shohei Ohtani 120.5 1.62000e+00 2023-02-25
2023-03-05 686 2 8 5 Mookie Betts 107.0 1.36000e+00 2023-03-05
2023-03-26 776 3 11 4 Juan Soto -7.6 1.37000e+00 2023-03-26
2023-02-22 776 3 7 3 Julio Rodríguez 46.1 1.18000e+00 2023-02-22
2023-02-23 677 4 11 3 Mookie Betts 112.4 1.18000e+00 2023-02-23
2023-03-26 677 3 9 4 Mookie Betts 98.8 1.10000e+00 2023-03-26
2023-03-08 686 6 13 3 Shohei Ohtani 33.3 9.90000e-01 2023-03-08
2023-03-09 587 6 15 3 Ronald Acuña Jr. -42.3 9.20000e-01 2023-03-09
2023-03-19 677 3 8 4 Juan Soto 140.7 1.03000e+00 2023-03-19
2023-03-01 578 5 14 4 José Ramírez 35.1 7.80000e-01 2023-03-01
2023-03-04 677 5 13 4 Julio Rodríguez 70.6 7.90000e-01 2023-03-04
2023-03-29 677 5 12 4 Mike Trout 27.1 9.00000e-01 2023-03-29
2023-02-22 587 4 10 3 Shohei Ohtani 49.6 9.00000e-01 2023-02-22
2023-02-25 677 4 10 3 Mookie Betts 57.0 8.40000e-01 2023-02-25
2023-03-26 686 4 10 3 Yordan Alvarez -6.7 7.70000e-01 2023-03-26
2023-02-21 776 4 9 3 Aaron Judge -50.4 5.90000e-01 2023-02-21
2023-02-23 677 4 11 5 Vladimir Guerrero Jr. 67.6 6.40000e-01 2023-02-23
2023-02-25 677 4 9 3 Mike Trout -3.2 6.40000e-01 2023-02-25
2023-03-12 686 6 13 3 Aaron Judge 65.3 5.80000e-01 2023-03-12
2023-02-27 586 4 11 4 Juan Soto -81.9 6.30000e-01 2023-02-27
2023-03-26 677 4 12 4 Julio Rodríguez 44.1 3.50000e-01 2023-03-26
2023-02-23 686 3 12 5 Mookie Betts 63.5 3.60000e-01 2023-02-23
2023-02-23 677 4 12 4 José Ramírez 9.5 4.70000e-01 2023-02-23
2023-03-19 677 4 11 4 Trea Turner 45.3 4.50000e-01 2023-03-19
2023-03-19 677 4 9 3 Shohei Ohtani 69.6 4.80000e-01 2023-03-19
2023-03-29 677 3 11 5 Mike Trout 5.8 3.20000e-01 2023-03-29
2023-03-12 686 3 12 6 Shohei Ohtani 21.1 4.70000e-01 2023-03-12
2023-03-12 686 4 14 4 Shohei Ohtani 10.7 3.00000e-01 2023-03-12
2023-03-19 686 3 10 4 Julio Rodríguez -16.4 2.40000e-01 2023-03-19
2023-02-21 776 3 6 2 Trea Turner -87.0 2.10000e-01 2023-02-21
2023-02-19 965 2 4 2 Kyle Tucker -21.0 2.50000e-01 2023-02-19
2023-02-21 677 2 4 2 Mike Trout -26.7 2.80000e-01 2023-02-21
2023-02-25 677 4 11 4 Aaron Judge 7.0 2.00000e-01 2023-02-25
2023-02-23 677 5 13 3 José Ramírez -4.7 2.30000e-01 2023-02-23
2023-02-28 677 2 9 7 José Ramírez -10.5 2.10000e-01 2023-02-28
2023-02-21 776 1 6 6 Mike Trout 0.9 -7.00000e-02 2023-02-21
2023-03-29 776 4 10 4 Juan Soto -58.7 5.20417e-18 2023-03-29
2023-02-26 677 4 11 3 José Ramírez -29.3 4.00000e-02 2023-02-26
2023-03-26 686 3 8 4 Shohei Ohtani 0.2 3.00000e-02 2023-03-26
2023-02-22 767 4 9 3 Vladimir Guerrero Jr. -61.9 -8.00000e-02 2023-02-22
2023-02-21 776 1 3 3 Kyle Tucker 21.3 8.00000e-02 2023-02-21
2023-03-02 677 5 13 5 Yordan Alvarez -13.2 1.00000e-01 2023-03-02
2023-03-20 587 3 12 6 Juan Soto -20.5 9.00000e-02 2023-03-20
2023-02-23 587 3 11 4 Mike Trout -25.1 -1.00000e-02 2023-02-23
2023-03-08 587 4 11 5 Mookie Betts -0.6 -9.00000e-02 2023-03-08
2023-03-30 676 4 11 4 Aaron Judge -92.8 3.00000e-02 2023-03-30
2023-03-26 677 4 11 4 Vladimir Guerrero Jr. -31.2 8.00000e-02 2023-03-26
2023-02-27 587 4 12 5 Juan Soto -17.9 6.00000e-02 2023-02-27
2023-02-28 677 3 9 5 José Ramírez 11.9 -1.00000e-01 2023-02-28
2023-02-25 587 4 12 4 Shohei Ohtani -27.3 6.00000e-02 2023-02-25
2023-02-25 677 4 12 4 Shohei Ohtani -35.4 -2.70000e-01 2023-02-25
2023-02-21 767 3 6 2 Kyle Tucker -51.7 -1.90000e-01 2023-02-21
2023-03-04 587 5 14 5 Aaron Judge -41.4 -2.30000e-01 2023-03-04
2023-03-30 686 5 13 4 Trea Turner -80.6 -1.10000e-01 2023-03-30
2023-03-20 677 3 12 5 José Ramírez -12.4 -1.60000e-01 2023-03-20
2023-03-26 587 6 14 3 Mike Trout -17.6 -1.40000e-01 2023-03-26
2023-02-21 776 3 7 3 Shohei Ohtani -13.2 -3.60000e-01 2023-02-21
2023-03-06 686 4 11 3 Ronald Acuña Jr. 32.6 -3.30000e-01 2023-03-06
2023-02-22 677 3 10 5 José Ramírez -76.0 -4.10000e-01 2023-02-22
2023-03-08 686 4 12 4 Ronald Acuña Jr. -32.5 -3.80000e-01 2023-03-08
2023-03-08 686 4 11 4 Juan Soto -27.3 -3.00000e-01 2023-03-08
2023-03-26 686 3 8 4 Mookie Betts -54.7 -4.30000e-01 2023-03-26
2023-03-30 686 3 11 7 Yordan Alvarez -106.5 -6.20000e-01 2023-03-30
2023-02-23 587 4 11 4 Mike Trout -82.5 -5.10000e-01 2023-02-23
2023-02-23 578 3 8 4 Juan Soto -77.8 -7.00000e-01 2023-02-23
2023-03-13 677 5 14 3 Mike Trout -92.5 -7.90000e-01 2023-03-13
2023-03-27 677 4 8 2 Vladimir Guerrero Jr. -167.4 -1.04000e+00 2023-03-27
2023-02-22 686 3 6 2 Gerrit Cole -101.8 -9.10000e-01 2023-02-22
2023-02-23 578 3 12 5 Yordan Alvarez -87.1 -1.13000e+00 2023-02-23
2023-03-29 677 4 12 4 Paul Goldschmidt -192.4 -1.69000e+00 2023-03-29
#create a list from the dataframe
exp_list <- split(exposure, exposure$Draft)

name_mapping <- data.frame(
  old_names = drafts$Draft, 
  new_names = drafts$file, 
  stringsAsFactors = F
)

# Find the indices of the old names in the dataframe
name_indices <- match(names(exp_list), name_mapping$old_names)

# Rename the list elements using the new names from the dataframe
names(exp_list)[!is.na(name_indices)] <- name_mapping$new_names[name_indices[!is.na(name_indices)]]